here::set_here()
## File .here already exists in /Users/koenvandenberge/Dropbox/research/brainStat/hbcRegenGithub/scripts
suppressPackageStartupMessages({
library(slingshot)
library(tradeSeq)
library(SingleCellExperiment)
library(cowplot)
library(rgl)
library(clusterExperiment)
library(RColorBrewer)
library(aggregation)
library(ggplot2)
library(pheatmap)
library(wesanderson)
library(UpSetR)
library(gridExtra)
library(NMF)
library(nichenetr)
library(Seurat)
library(dplyr)
library(tidyverse)
library(circlize)
library(ComplexHeatmap)
library(NMF)
library(msigdbr)
library(fgsea)
})
colby <- function(values, g=4){
if(is(values, "character")){
cols <- as.numeric(as.factor(values))
return(cols)
}
if(is(values, "factor")){
ncl <- nlevels(values)
if(ncl > 9){
colpal <- c(RColorBrewer::brewer.pal(9, 'Set1'), wesanderson::wes_palette("Darjeeling1", n=ncl-9, type="continuous"))
} else {
colpal <- RColorBrewer::brewer.pal(9, 'Set1')
}
cols <- colpal[as.numeric(values)]
return(cols)
}
if(is(values, "numeric")){
pal <- wesanderson::wes_palette("Zissou1", n=g, type="continuous")
gg <- Hmisc::cut2(values, g=g)
if(nlevels(gg) < g){
nl <- nlevels(gg)
if(nl == 2) pal <- pal[c(1,g)]
}
cols <- pal[gg]
return(cols)
}
}
sds <- readRDS("../data/finalTrajectory/sling.rds")
counts <- readRDS("../data/finalTrajectory/counts_noResp_noMV.rds")
counts <- round(counts)
sce <- readRDS("../data/finalTrajectory/sce_tradeSeq20200904.rds")
load("../data/ALL_TF.Rda")
clDatta <- readRDS("../data/finalTrajectory/dattaCl_noResp_noMV.rds")
plot3d(reducedDim(sds), aspect = 'iso', col=brewer.pal(9,'Set1')[clDatta], alpha=.6)
plot3d.SlingshotDataSet(sds, add=TRUE, col=c("black"), lwd=4)
library(nichenetr)
library(tidyverse)
ligand_target_matrix = readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds"))
lr_network = readRDS(url("https://zenodo.org/record/3260758/files/lr_network.rds"))
ligand_target_matrix_original <- ligand_target_matrix
lr_network_original <- lr_network
# convert human to mouse
ligand_target_matrix <- ligand_target_matrix_original
colnames(ligand_target_matrix) = ligand_target_matrix %>% colnames() %>% convert_human_to_mouse_symbols()
rownames(ligand_target_matrix) = ligand_target_matrix %>% rownames() %>% convert_human_to_mouse_symbols()
Sender population is regenerated HBC. Receiver population is activated HBC. We define an expressed gene is a gene being expressed (non-zero) in at least 10% of the cells in that cell type.
# expressed_genes_sender <- rownames(counts)[rowMeans(counts[, clDatta == "HBC"] > 0) > 0.05]
# expressed_genes_receiver <- rownames(counts)[rowMeans(counts[, clDatta == "HBC*"] > 0) > 0.05]
# length(expressed_genes_sender)
# length(expressed_genes_receiver)
expressed_genes_sender <- expressed_genes_receiver <- rownames(counts)
clDatta <- droplevels(clDatta)
clDatta <- relevel(clDatta, ref="HBC*")
library(glmGamPoi)
# fit <- glm_gp(counts,
# design = model.matrix(~ clDatta))
fit <- readRDS("~/tmp/glmGamFit.rds")
deRes <- test_de(fit, contrast = "clDattaHBC")
rownames(deRes) <- deRes$name
deGenes <- deRes$name[deRes$adj_pval <= 0.01]
deGenesOrd <- order(abs(deRes[deGenes, "lfc"]), decreasing=TRUE)
geneset_oi <- deGenes[1:500]
background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
lr_network <- lr_network_original
lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na()
ligands = lr_network %>% pull(from) %>% unique()
expressed_ligands = intersect(ligands,expressed_genes_sender)
receptors = lr_network %>% pull(to) %>% unique()
expressed_receptors = intersect(receptors,expressed_genes_receiver)
lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors)
head(lr_network_expressed)
## # A tibble: 6 x 4
## from to source database
## <chr> <chr> <chr> <chr>
## 1 Cxcl1 Cxcr2 kegg_cytokines kegg
## 2 Cxcl2 Cxcr2 kegg_cytokines kegg
## 3 Cxcl5 Cxcr2 kegg_cytokines kegg
## 4 Ppbp Cxcr2 kegg_cytokines kegg
## 5 Cxcl12 Cxcr4 kegg_cytokines kegg
## 6 Cx3cl1 Cx3cr1 kegg_cytokines kegg
potential_ligands = lr_network_expressed %>% pull(from) %>% unique()
head(potential_ligands)
## [1] "Cxcl1" "Cxcl2" "Cxcl5" "Ppbp" "Cxcl12" "Cx3cl1"
ligand_activities = predict_ligand_activities(geneset = geneset_oi,
background_expressed_genes = background_expressed_genes,
ligand_target_matrix = ligand_target_matrix,
potential_ligands = potential_ligands)
ligand_activities_hbcAct_hbc <- ligand_activities
ligand_activities %>% arrange(-pearson)
## # A tibble: 366 x 4
## test_ligand auroc aupr pearson
## <chr> <dbl> <dbl> <dbl>
## 1 Agrn 0.594 0.0453 0.0541
## 2 Hspg2 0.580 0.0418 0.0420
## 3 Nrg1 0.549 0.0421 0.0408
## 4 Lamb1 0.575 0.0438 0.0399
## 5 Ctf1 0.549 0.0417 0.0390
## 6 Il23a 0.554 0.0436 0.0383
## 7 Grn 0.563 0.0424 0.0372
## 8 Lamb2 0.560 0.0416 0.0371
## 9 Il24 0.558 0.0416 0.0365
## 10 Ptprt 0.558 0.0415 0.0359
## # … with 356 more rows
best_upstream_ligands = ligand_activities %>% top_n(20, pearson) %>% arrange(-pearson) %>% pull(test_ligand)
# Top 20 of most active ligands:
best_upstream_ligands
## [1] "Agrn" "Hspg2" "Nrg1" "Lamb1" "Ctf1" "Il23a" "Grn" "Lamb2"
## [9] "Il24" "Ptprt" "Jag2" "Tdgf1" "Camp" "Dkk1" "Tslp" "Dscam"
## [17] "Wnt10a" "Inhba" "Inhbb" "Pvr"
# threshold
p_hist_lig_activity = ggplot(ligand_activities, aes(x=pearson)) +
geom_histogram(color="black", fill="darkorange") +
# geom_density(alpha=.1, fill="orange") +
geom_vline(aes(xintercept=min(ligand_activities %>% top_n(20, pearson) %>% pull(pearson))), color="red", linetype="dashed", size=1) +
labs(x="ligand activity (PCC)", y = "# ligands") +
theme_classic()
p_hist_lig_activity
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()
active_ligand_target_links_df <- active_ligand_target_links_df[!is.na(active_ligand_target_links_df$target),]
active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25)
order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev()
order_targets = intersect(active_ligand_target_links_df$target, rownames(active_ligand_target_links)) %>% unique()
vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t()
p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized HBC-ligands","HBC* target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.005,0.01)) + theme(axis.text.x = element_text(face = "italic"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
color <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100)
breaks <- seq(0, 0.01, length=100)
breaks <- c(breaks, 0.05)
annoRowDf <- data.frame(meanExp = log1p(rowMeans(counts[rownames(vis_ligand_target), clDatta=="HBC"])))
rownames(annoRowDf) <- rownames(vis_ligand_target)
pheatmap::pheatmap(vis_ligand_target, cluster_rows=FALSE, cluster_cols=FALSE,
col=color, breaks=breaks, border_color = NA,
annotation_row = annoRowDf)
current_plot_df <- vis_ligand_target
active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()
cutoff_include_all_ligands = active_ligand_target_links_df$weight %>% quantile(0.66, na.rm=TRUE)
active_ligand_target_links_df_circos = active_ligand_target_links_df %>% filter(weight > cutoff_include_all_ligands)
ligands_to_remove = setdiff(active_ligand_target_links_df$ligand %>% unique(), active_ligand_target_links_df_circos$ligand %>% unique())
targets_to_remove = setdiff(active_ligand_target_links_df$target %>% unique(), active_ligand_target_links_df_circos$target %>% unique())
circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove)
logfc = deRes$lfc
logfc_filter = deRes$name[abs(logfc) >= 2]
circos_links = circos_links %>% filter(target %in% logfc_filter)
# logfc_sender = deRes$lfc[deRes$name %in% rownames(current_plot_df)]
# circos_links = circos_links %>% arrange(logfc[circos_links$target])
#
cdm_res = chordDiagram(circos_links, annotationTrack = "grid", preAllocateTracks = 1)
abs_max = max(logfc, na.rm = TRUE)
# https://jokergoo.github.io/circlize_book/book/legends.html#legends
# https://jokergoo.github.io/circlize_book/book/a-complex-example-of-chord-diagram.html
col_fun = colorRamp2(c(-abs_max, 0, abs_max), c("dodgerblue4", "white", "red"))
col_fun2 = colorRamp2(c(0, max(ligand_activities[, "pearson"])), c("white", "green4"))
lgd_links = Legend(at = c(-4, -2, 0, 2, 4), col_fun = col_fun, title_position = "topleft", title = "LogFC")
lgd_act = Legend(at = c(0, ceiling(100*max(ligand_activities[, "pearson"])))/100, col_fun = col_fun2, title_position = "topleft", title = "Ligand Activity")
lgd_list_vertical = packLegend(lgd_links, lgd_act)
ylim = get.cell.meta.data("ylim", sector.index = circos_links$ligand[1], track.index = 1)
y1 = ylim[2] - (ylim[2] - ylim[1])*0.9
y2 = ylim[2] - (ylim[2] - ylim[1])*0.75
y3 = ylim[2] - (ylim[2] - ylim[1])*0.74
y4 = ylim[2] - (ylim[2] - ylim[1])*0.59
for(i in seq_len(nrow(cdm_res))) {
if(cdm_res$value1[i] > 0) {
circos.rect(
cdm_res[i, "x1"], y1,
cdm_res[i, "x1"] - abs(cdm_res[i, "value1"]), y1 + (y2-y1)*0.7,
col = col_fun(min(5, max(-5, logfc[cdm_res$rn[i]]))),
border = col_fun(min(5, max(-5, logfc[cdm_res$rn[i]]))),
sector.index = cdm_res$rn[i], track.index = 1)
circos.rect(
cdm_res[i, "x1"], y3 + (y4-y3)*0.3,
cdm_res[i, "x1"] - abs(cdm_res[i, "value1"]), y4,
col = col_fun2(ligand_activities %>% filter(test_ligand == cdm_res$rn[i]) %>% .$pearson),
border = col_fun2(ligand_activities %>% filter(test_ligand == cdm_res$rn[i]) %>% .$pearson),
sector.index = cdm_res$rn[i], track.index = 1)
circos.rect(
cdm_res[i, "x2"], y1 + (y2-y1)*0.3,
cdm_res[i, "x2"] - abs(cdm_res[i, "value1"]), y2,
col = col_fun(min(5, max(-5, logfc[cdm_res$cn[i]]))),
border = col_fun(min(5, max(-5, logfc[cdm_res$cn[i]]))),
sector.index = cdm_res$cn[i], track.index = 1)
}
}
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim), y4 + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5), cex = 0.5)
}, bg.border = NA)
draw(lgd_list_vertical, x = unit(0.95, "npc"), y = unit(0.85, "npc"), just = c("top", "right"))
circos.clear()
clDatta <- droplevels(clDatta)
clDatta <- relevel(clDatta, ref="HBC*")
library(glmGamPoi)
# fit <- glm_gp(counts,
# design = model.matrix(~ clDatta))
L <- matrix(0, nrow=ncol(fit$Beta), ncol=1)
rownames(L) <- colnames(fit$Beta)
L[,1] <- 1/5
deRes <- test_de(fit,
contrast = L)
rownames(deRes) <- deRes$name
# deGenes <- deRes$name[deRes$adj_pval <= 0.01 & abs(deRes$lfc) > log2(2)]
deGenes <- deRes$name[deRes$adj_pval <= 0.01]
deGenesOrd <- order(abs(deRes[deGenes, "lfc"]), decreasing=TRUE)
length(deGenes)
## [1] 11429
geneset_oi <- deGenes[deGenesOrd[1:500]]
background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
lr_network <- lr_network_original
lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na()
ligands = lr_network %>% pull(from) %>% unique()
expressed_ligands = intersect(ligands,expressed_genes_sender)
receptors = lr_network %>% pull(to) %>% unique()
expressed_receptors = intersect(receptors,expressed_genes_receiver)
lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors)
head(lr_network_expressed)
## # A tibble: 6 x 4
## from to source database
## <chr> <chr> <chr> <chr>
## 1 Cxcl1 Cxcr2 kegg_cytokines kegg
## 2 Cxcl2 Cxcr2 kegg_cytokines kegg
## 3 Cxcl5 Cxcr2 kegg_cytokines kegg
## 4 Ppbp Cxcr2 kegg_cytokines kegg
## 5 Cxcl12 Cxcr4 kegg_cytokines kegg
## 6 Cx3cl1 Cx3cr1 kegg_cytokines kegg
potential_ligands = lr_network_expressed %>% pull(from) %>% unique()
head(potential_ligands)
## [1] "Cxcl1" "Cxcl2" "Cxcl5" "Ppbp" "Cxcl12" "Cx3cl1"
ligand_activities = predict_ligand_activities(geneset = geneset_oi,
background_expressed_genes = background_expressed_genes,
ligand_target_matrix = ligand_target_matrix,
potential_ligands = potential_ligands)
ligand_activities_hbcAct_all <- ligand_activities
ligand_activities %>% arrange(-pearson)
## # A tibble: 366 x 4
## test_ligand auroc aupr pearson
## <chr> <dbl> <dbl> <dbl>
## 1 Tnf 0.507 0.0527 0.0837
## 2 Tgfb1 0.494 0.0501 0.0602
## 3 Il1b 0.500 0.0446 0.0590
## 4 Apoe 0.472 0.0397 0.0492
## 5 Ccl12 0.485 0.0465 0.0478
## 6 Has2 0.521 0.0429 0.0393
## 7 Il23a 0.516 0.0470 0.0384
## 8 Il18 0.513 0.0447 0.0382
## 9 Tgfb2 0.502 0.0480 0.0358
## 10 Il15 0.508 0.0411 0.0357
## # … with 356 more rows
best_upstream_ligands = ligand_activities %>% top_n(15, pearson) %>% arrange(-pearson) %>% pull(test_ligand)
# Top 20 of most active ligands:
best_upstream_ligands
## [1] "Tnf" "Tgfb1" "Il1b" "Apoe" "Ccl12" "Has2" "Il23a" "Il18"
## [9] "Tgfb2" "Il15" "Mpz" "App" "Adam17" "Csf2" "Il10"
# threshold
p_hist_lig_activity = ggplot(ligand_activities, aes(x=pearson)) +
geom_histogram(color="black", fill="darkorange") +
# geom_density(alpha=.1, fill="orange") +
geom_vline(aes(xintercept=min(ligand_activities %>% top_n(20, pearson) %>% pull(pearson))), color="red", linetype="dashed", size=1) +
labs(x="ligand activity (PCC)", y = "# ligands") +
theme_classic()
p_hist_lig_activity
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()
active_ligand_target_links_df <- active_ligand_target_links_df[!is.na(active_ligand_target_links_df$target),]
active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.25)
order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev()
order_targets = intersect(active_ligand_target_links_df$target, rownames(active_ligand_target_links)) %>% unique()
vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t()
p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","HBC* target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.005,0.01)) + theme(axis.text.x = element_text(face = "italic"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
color <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100)
breaks <- seq(0, 0.01, length=100)
breaks <- c(breaks, 0.05)
annoRowDf <- data.frame(meanExp = log1p(rowMeans(counts[rownames(vis_ligand_target), clDatta=="HBC"])))
rownames(annoRowDf) <- rownames(vis_ligand_target)
pheatmap::pheatmap(vis_ligand_target, cluster_rows=FALSE, cluster_cols=FALSE,
col=color, breaks=breaks, border_color = NA,
annotation_row = annoRowDf)
current_plot_df <- vis_ligand_target
active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()
cutoff_include_all_ligands = active_ligand_target_links_df$weight %>% quantile(0.66, na.rm=TRUE)
active_ligand_target_links_df_circos = active_ligand_target_links_df %>% filter(weight > cutoff_include_all_ligands)
ligands_to_remove = setdiff(active_ligand_target_links_df$ligand %>% unique(), active_ligand_target_links_df_circos$ligand %>% unique())
targets_to_remove = setdiff(active_ligand_target_links_df$target %>% unique(), active_ligand_target_links_df_circos$target %>% unique())
circos_links = active_ligand_target_links_df %>% filter(!target %in% targets_to_remove &!ligand %in% ligands_to_remove)
logfc = deRes$lfc
logfc_filter = deRes$name[abs(logfc) >= 2]
circos_links = circos_links %>% filter(target %in% logfc_filter)
# logfc_sender = deRes$lfc[deRes$name %in% rownames(current_plot_df)]
# circos_links = circos_links %>% arrange(logfc[circos_links$target])
#
cdm_res = chordDiagram(circos_links, annotationTrack = "grid", preAllocateTracks = 1)
## Note: The second link end is drawn out of sector 'Bcl2a1a'.
## Note: The second link end is drawn out of sector 'Il23a'.
abs_max = max(logfc, na.rm = TRUE)
# https://jokergoo.github.io/circlize_book/book/legends.html#legends
# https://jokergoo.github.io/circlize_book/book/a-complex-example-of-chord-diagram.html
col_fun = colorRamp2(c(-abs_max, 0, abs_max), c("dodgerblue4", "white", "red"))
col_fun2 = colorRamp2(c(0, max(ligand_activities[, "pearson"])), c("white", "green4"))
lgd_links = Legend(at = c(-4, -2, 0, 2, 4), col_fun = col_fun, title_position = "topleft", title = "LogFC")
lgd_act = Legend(at = c(0, ceiling(100*max(ligand_activities[, "pearson"])))/100, col_fun = col_fun2, title_position = "topleft", title = "Ligand Activity")
lgd_list_vertical = packLegend(lgd_links, lgd_act)
ylim = get.cell.meta.data("ylim", sector.index = circos_links$ligand[1], track.index = 1)
y1 = ylim[2] - (ylim[2] - ylim[1])*0.9
y2 = ylim[2] - (ylim[2] - ylim[1])*0.75
y3 = ylim[2] - (ylim[2] - ylim[1])*0.74
y4 = ylim[2] - (ylim[2] - ylim[1])*0.59
for(i in seq_len(nrow(cdm_res))) {
if(cdm_res$value1[i] > 0) {
circos.rect(
cdm_res[i, "x1"], y1,
cdm_res[i, "x1"] - abs(cdm_res[i, "value1"]), y1 + (y2-y1)*0.7,
col = col_fun(min(5, max(-5, logfc[cdm_res$rn[i]]))),
border = col_fun(min(5, max(-5, logfc[cdm_res$rn[i]]))),
sector.index = cdm_res$rn[i], track.index = 1)
circos.rect(
cdm_res[i, "x1"], y3 + (y4-y3)*0.3,
cdm_res[i, "x1"] - abs(cdm_res[i, "value1"]), y4,
col = col_fun2(ligand_activities %>% filter(test_ligand == cdm_res$rn[i]) %>% .$pearson),
border = col_fun2(ligand_activities %>% filter(test_ligand == cdm_res$rn[i]) %>% .$pearson),
sector.index = cdm_res$rn[i], track.index = 1)
circos.rect(
cdm_res[i, "x2"], y1 + (y2-y1)*0.3,
cdm_res[i, "x2"] - abs(cdm_res[i, "value1"]), y2,
col = col_fun(min(5, max(-5, logfc[cdm_res$cn[i]]))),
border = col_fun(min(5, max(-5, logfc[cdm_res$cn[i]]))),
sector.index = cdm_res$cn[i], track.index = 1)
}
}
## Note: 3 points are out of plotting region in sector 'Bcl2a1a', track
## '1'.
## Note: 3 points are out of plotting region in sector 'Il23a', track '1'.
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim), y4 + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5), cex = 0.5)
}, bg.border = NA)
draw(lgd_list_vertical, x = unit(0.95, "npc"), y = unit(0.85, "npc"), just = c("top", "right"))
circos.clear()
prepare_ligand_target_visualization_kvdb <- function(ligand_target_df,
ligand_target_matrix,
cutoff = 0.25)
{
cutoff_include_all_ligands = ligand_target_df$weight %>%
quantile(cutoff, na.rm=TRUE)
ligand_target_matrix_oi = ligand_target_matrix
ligand_target_matrix_oi[ligand_target_matrix_oi < cutoff_include_all_ligands] = 0
ligand_target_vis = ligand_target_matrix_oi[ligand_target_df$target %>%
unique(), ligand_target_df$ligand %>% unique()]
dim(ligand_target_vis) = c(length(ligand_target_df$target %>%
unique()), length(ligand_target_df$ligand %>% unique()))
all_targets = ligand_target_df$target %>% unique()
all_ligands = ligand_target_df$ligand %>% unique()
rownames(ligand_target_vis) = all_targets
colnames(ligand_target_vis) = all_ligands
keep_targets = all_targets[ligand_target_vis %>% apply(1,
sum) > 0]
keep_ligands = all_ligands[ligand_target_vis %>% apply(2,
sum) > 0]
ligand_target_vis_filtered = ligand_target_vis[keep_targets,
keep_ligands]
if (is.matrix(ligand_target_vis_filtered)) {
rownames(ligand_target_vis_filtered) = keep_targets
colnames(ligand_target_vis_filtered) = keep_ligands
}
else {
dim(ligand_target_vis_filtered) = c(length(keep_targets),
length(keep_ligands))
rownames(ligand_target_vis_filtered) = keep_targets
colnames(ligand_target_vis_filtered) = keep_ligands
}
if (nrow(ligand_target_vis_filtered) > 1 & ncol(ligand_target_vis_filtered) >
1) {
distoi = dist(1 - cor(t(ligand_target_vis_filtered)))
hclust_obj = hclust(distoi, method = "ward.D2")
order_targets = hclust_obj$labels[hclust_obj$order]
distoi_targets = dist(1 - cor(ligand_target_vis_filtered))
hclust_obj = hclust(distoi_targets, method = "ward.D2")
order_ligands = hclust_obj$labels[hclust_obj$order]
}
else {
order_targets = rownames(ligand_target_vis_filtered)
order_ligands = colnames(ligand_target_vis_filtered)
}
vis_ligand_target_network = ligand_target_vis_filtered[order_targets,
order_ligands]
dim(vis_ligand_target_network) = c(length(order_targets),
length(order_ligands))
rownames(vis_ligand_target_network) = order_targets
colnames(vis_ligand_target_network) = order_ligands
return(vis_ligand_target_network)
}
runNicheNet <- function(genesOfInterest, nTopLigands,
expressed_genes_receiver,
expressed_genes_sender,
lr_network_original = lr_network_original){
geneset_oi <- genesOfInterest
background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
lr_network <- lr_network_original
lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na()
ligands = lr_network %>% pull(from) %>% unique()
expressed_ligands = intersect(ligands,expressed_genes_sender)
receptors = lr_network %>% pull(to) %>% unique()
expressed_receptors = intersect(receptors,expressed_genes_receiver)
lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors)
head(lr_network_expressed)
potential_ligands = lr_network_expressed %>% pull(from) %>% unique()
head(potential_ligands)
ligand_activities = predict_ligand_activities(geneset = geneset_oi,
background_expressed_genes = background_expressed_genes,
ligand_target_matrix = ligand_target_matrix,
potential_ligands = potential_ligands)
ligand_activities %>% arrange(-pearson)
best_upstream_ligands = ligand_activities %>% top_n(nTopLigands, pearson) %>% arrange(-pearson) %>% pull(test_ligand)
# Top 20 of most active ligands:
best_upstream_ligands
# threshold
p_hist_lig_activity = ggplot(ligand_activities, aes(x=pearson)) +
geom_histogram(color="black", fill="darkorange") +
# geom_density(alpha=.1, fill="orange") +
geom_vline(aes(xintercept=min(ligand_activities %>% top_n(nTopLigands, pearson) %>% pull(pearson))), color="red", linetype="dashed", size=1) +
labs(x="ligand activity (PCC)", y = "# ligands") +
theme_classic()
p_hist_lig_activity
### changed
# active_ligand_target_links_df = best_upstream_ligands %>% lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 250) %>% bind_rows()
active_ligand_target_links_df = best_upstream_ligands %>%
lapply(get_weighted_ligand_target_links,
geneset = geneset_oi,
ligand_target_matrix = ligand_target_matrix,
n = 250) %>%
bind_rows()
# added:
active_ligand_target_links_df <- active_ligand_target_links_df[!is.na(active_ligand_target_links_df$target),]
active_ligand_target_links <- prepare_ligand_target_visualization_kvdb(
ligand_target_df = active_ligand_target_links_df,
ligand_target_matrix = ligand_target_matrix,
cutoff = 0.25)
order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev()
order_targets = intersect(active_ligand_target_links_df$target, rownames(active_ligand_target_links)) %>% unique()
vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t()
p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot("Prioritized ligands","HBC* target genes", color = "purple",legend_position = "top", x_axis_position = "top",legend_title = "Regulatory potential") + scale_fill_gradient2(low = "whitesmoke", high = "purple", breaks = c(0,0.005,0.01)) + theme(axis.text.x = element_text(face = "italic"))
color <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100)
breaks <- seq(0, 0.01, length=100)
breaks <- c(breaks, 0.05)
annoRowDf <- data.frame(meanExp = log1p(rowMeans(counts[rownames(vis_ligand_target), clDatta=="HBC"])))
rownames(annoRowDf) <- rownames(vis_ligand_target)
ph <- pheatmap::pheatmap(vis_ligand_target, cluster_rows=FALSE, cluster_cols=FALSE,
col=color, breaks=breaks, border_color = NA,
annotation_row = annoRowDf)
return(ph)
}
length(deGenes) ## 5623
phList <- list()
for(kk in 1:5){
set.seed(kk)
genes <- sample(rownames(counts), 5000)
phList[[kk]] <- runNicheNet(genesOfInterest=genes, nTopLigands=20,
expressed_genes_receiver,
expressed_genes_sender,
lr_network_original)
print(phList[[kk]])
}
phList2k <- list()
for(kk in 1:3){
set.seed(kk)
genes <- sample(rownames(counts), 2000)
phList2k[[kk]] <- runNicheNet(genesOfInterest=genes, nTopLigands=20,
expressed_genes_receiver,
expressed_genes_sender,
lr_network_original)
print(phList2k[[kk]])
}
phList2k <- list()
for(kk in 1:6){
set.seed(kk)
genes <- sample(rownames(counts), 200)
phList2k[[kk]] <- runNicheNet(genesOfInterest=genes, nTopLigands=15,
expressed_genes_receiver,
expressed_genes_sender,
lr_network_original)
print(phList2k[[kk]])
}
runNicheNetPC <- function(genesOfInterest, nTopLigands,
expressed_genes_receiver,
expressed_genes_sender,
lr_network = lr_network_original){
geneset_oi <- genesOfInterest
background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
lr_network = lr_network %>% mutate(from = convert_human_to_mouse_symbols(from), to = convert_human_to_mouse_symbols(to)) %>% drop_na()
ligands = lr_network %>% pull(from) %>% unique()
expressed_ligands = intersect(ligands,expressed_genes_sender)
receptors = lr_network %>% pull(to) %>% unique()
expressed_receptors = intersect(receptors,expressed_genes_receiver)
lr_network_expressed = lr_network %>% filter(from %in% expressed_ligands & to %in% expressed_receptors)
potential_ligands = lr_network_expressed %>% pull(from) %>% unique()
ligand_activities = predict_ligand_activities(geneset = geneset_oi,
background_expressed_genes = background_expressed_genes,
ligand_target_matrix = ligand_target_matrix,
potential_ligands = potential_ligands)
return(ligand_activities)
}
resList <- list()
N <- 100
genes <- sample(rownames(counts), 200)
hlp <- runNicheNetPC(genesOfInterest=genes,
nTopLigands=15,
rownames(counts),
rownames(counts),
lr_network = lr_networkOrig)
ligandActivityNullMat <- matrix(NA, nrow=nrow(hlp), ncol=N)
rownames(ligandActivityNullMat) <- sort(hlp$test_ligand)
for(kk in 1:N){
set.seed(kk)
message(kk)
genes <- sample(rownames(counts), 200)
res <- runNicheNetPC(genesOfInterest=genes,
nTopLigands=15,
rownames(counts),
rownames(counts),
lr_network = lr_networkOrig)
ligandActivityNullMat[res$test_ligand, kk] <- res$pearson
}
saveRDS(ligandActivityNullMat, file = "../data/ligandActivityNullMat.rds")
ligandActivityNullMat <- readRDS("../data/ligandActivityNullMat.rds")
rafalib::mypar(mfrow=c(3,3))
pvalHBC <- vector(length = nrow(ligandActivityNullMat))
pvalAll <- vector(length = nrow(ligandActivityNullMat))
pccHBC <- pvalHBC
pccAll <- pvalAll
names(pvalHBC) <- names(pvalAll) <- rownames(ligandActivityNullMat)
for(kk in 1:nrow(ligandActivityNullMat)){
curLigand <- rownames(ligandActivityNullMat)[kk]
pccHBC[kk] <- ligand_activities_hbcAct_hbc$pearson[ligand_activities_hbcAct_all$test_ligand == curLigand]
pccAll[kk] <- ligand_activities_hbcAct_all$pearson[ligand_activities_hbcAct_all$test_ligand == curLigand]
hist(ligandActivityNullMat[kk,], breaks=40, xlim=c(min(ligandActivityNullMat), max(ligandActivityNullMat)), xlab="PCC", main=curLigand)
abline(v=pccHBC[kk], col="red", lwd=2)
abline(v=pccAll[kk], col="blue", lwd=2)
pvalHBC[kk] <- mean(ligandActivityNullMat[curLigand,] > pccHBC[kk])
pvalAll[kk] <- mean(ligandActivityNullMat[curLigand,] > pccAll[kk])
}
rafalib::mypar(mfrow=c(1,2))
plot(pccHBC, pccAll, pch=16, cex=2/3)
plot(x=rowMeans(cbind(pccHBC, pccAll)),
y=pccHBC - pccAll,
pch=16, cex=2/3) ; abline(v=0, lty=2, col="red")
# there is a big difference in p-value because there is a general mean shift
# in the PCC between both analyses...
hist(pvalAll)
hist(pvalHBC)
ligandActivityNullMat <- readRDS("../data/ligandActivityNullMat.rds")
ligand_activities_hbcAct_hbc$pearsonCentered <- ligand_activities_hbcAct_hbc$pearson - mean(ligand_activities_hbcAct_hbc$pearson)
ligand_activities_hbcAct_all$pearsonCentered <- ligand_activities_hbcAct_all$pearson - mean(ligand_activities_hbcAct_all$pearson)
rafalib::mypar(mfrow=c(3,3))
pvalCenteredHBC <- vector(length = nrow(ligandActivityNullMat))
pvalCenteredAll <- vector(length = nrow(ligandActivityNullMat))
pccCenteredHBC <- pvalHBC
pccCenteredAll <- pvalAll
names(pvalHBC) <- names(pvalAll) <- rownames(ligandActivityNullMat)
for(kk in 1:nrow(ligandActivityNullMat)){
curLigand <- rownames(ligandActivityNullMat)[kk]
pccCenteredHBC[kk] <- ligand_activities_hbcAct_hbc$pearsonCentered[ligand_activities_hbcAct_all$test_ligand == curLigand]
pccCenteredAll[kk] <- ligand_activities_hbcAct_all$pearsonCentered[ligand_activities_hbcAct_all$test_ligand == curLigand]
hist(ligandActivityNullMat[kk,], breaks=40, xlim=c(min(ligandActivityNullMat), max(ligandActivityNullMat)), xlab="PCC", main=curLigand)
abline(v=pccCenteredHBC[kk], col="red", lwd=2)
abline(v=pccCenteredAll[kk], col="blue", lwd=2)
pvalCenteredHBC[kk] <- mean(ligandActivityNullMat[curLigand,] > pccCenteredHBC[kk])
pvalCenteredAll[kk] <- mean(ligandActivityNullMat[curLigand,] > pccCenteredAll[kk])
}
rafalib::mypar(mfrow=c(1,2))
plot(pccCenteredHBC, pccCenteredAll, pch=16, cex=2/3)
plot(x=rowMeans(cbind(pccCenteredHBC, pccCenteredAll)),
y=pccCenteredHBC - pccCenteredAll,
pch=16, cex=2/3) ; abline(h=0, lty=2, col="red")
# there is a big difference in p-value because there is a general mean shift
# in the PCC between both analyses...
hist(pvalCenteredAll)
hist(pvalCenteredHBC)